# packages
library(ggplot2)
library(here)
## here() starts at C:/Users/Jonathan/Desktop/unimelb/PhD/missNetsSimulations
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# true estimates
resultsFiles = list.files(path = here("Output", "20230726_missNetsTrueModels"))
# initialise list
trueCoefList = list()
trueSeList = list()
# index for how missingness is saved
missSaveLabel = c("Peter", "Todd")
missSaveValue = c("NA", "0")
# TODO:: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO:: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# loop to load model parameters
for(netInd in 1:6){
# load in results for each network
load(here("Output", "20230726_missNetsTrueModels", resultsFiles[[netInd]]))
# and take the coefficients only
trueCoefList[[netInd]] = coef(modelres)
trueSeList[[netInd]] = summary(modelres)$coefficients[,2]
}
## Warning: This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## This object was fit with 'ergm' version 3.11.0 or earlier. Summarizing it with version 3.12 or later may return incorrect results or fail.
## Warning: This object was fit with 'ergm' version 4.2.2 or earlier. Summarizing
## it with version 4.3 or later may return incorrect results or fail.
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", chosenMissLabel))
# set up some lists to put in the ergm re-estimations
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# hmm... what's the best way to handle this....
# I have specific indices,
# but there are different amounts of each file because re-estimations tend to fail
# especially for some combinations
# some regular expressions
# net1Ind = grep("net1", outputList)
# missMod2Ind = grep("missModel2", outputList)
#
# # check which indices overlap
# net1MissMod2 = missMod2Ind[missMod2Ind %in% net1Ind]
# set a specific index for the network
chosenNetInd = 2
# file list for the chosen network index
chosenNetOutFiles = grep(paste("20230814_missNetReest_net", chosenNetInd, sep =""), outputList)
##### For the MNAR files (coefSet1), the Miss Model is always 2... so what I can do is:
## regex the MNAR files
chosenMnarOutFiles = grep(paste("20231011_missErgmNetReest_Miss", chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propMiss is fine (or should be.)
# regex to get the first number after the specified text
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# plotting
# note: I think I should have a plot for each missingness proportion
chosenProp = 1
# there should only be 3 miss props, 1 for 10%, 3.5 for 35%, and 6 for 60%
# subset the lists to only be a single chosen proportion
chosenPropIndex = propMissList == chosenProp
modelResChosenPropList = modelResList[chosenPropIndex]
modelSeChosenPropList = modelSeList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]
## AltStar
# making the dataset
altStarPlotData = data.frame(altStar = c(unlist(lapply(modelResChosenPropList, `[[`, 2) )),
SE = c(unlist(lapply(modelSeChosenPropList, `[[`, 2))),
missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "marErgm", "Latent", "mnarErgm")))
# order the plot data
altStarPlotOrd = altStarPlotData[order(altStarPlotData[,"altStar"]),]
# one more ordering with the missing model type
altStarPlotOrd = altStarPlotOrd[order(altStarPlotOrd[,"missModel"]),]
# getting the true altStar value
trueAltStar = trueCoefList[[chosenNetInd]][2]
trueAltStarSE = trueSeList[[chosenNetInd]][2]
# caterpillar?
altStarPlot = ggplot( data = altStarPlotOrd,
aes( x = 1:nrow(altStarPlotOrd), y = altStar, col = missModel)) +
geom_errorbar(aes (ymin = (altStar - 1.96*SE), ymax = (altStar + 1.96*SE), width = 0.4)) +
xlab("") +
ylab("AltStar estimate (95% Confidence Int)") +
geom_hline(yintercept = trueAltStar, col = "darkblue") +
geom_hline(yintercept = 0, col = "black", lty = 2) +
geom_point() +
ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, sep = "")) +
theme_classic() +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
# legend.position = "none") +
geom_rect(aes(xmin = -Inf,
xmax = Inf,
ymin = (trueAltStar - 1.96*trueAltStarSE),
ymax = (trueAltStar + 1.96*trueAltStarSE)),
alpha = 0.01,
colour = NA,
fill = "grey") +
ylim(min(altStarPlotData$altStar) - 2*max(abs(altStarPlotData$SE)), max(altStarPlotData$altStar) + 2*max(abs(altStarPlotData$SE)))
altStarPlot
## Gwesp
# making the dataset
gwespPlotData = data.frame(gwesp = c(unlist(lapply(modelResChosenPropList, `[[`, 3) )),
SE = c(unlist(lapply(modelSeChosenPropList, `[[`, 3))),
missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "marErgm", "Latent", "mnarErgm")))
# order the plot data
gwespPlotOrd = gwespPlotData[order(gwespPlotData[,"gwesp"]),]
# one more ordering with the missing model type
gwespPlotOrd = gwespPlotOrd[order(gwespPlotOrd[,"missModel"]),]
# getting the true gwdeg value
truegwesp = trueCoefList[[chosenNetInd]][3]
truegwespSE = trueSeList[[chosenNetInd]][3]
# caterpillar?
gwespPlot = ggplot( data = gwespPlotOrd,
aes( x = 1:nrow(gwespPlotOrd), y = gwesp, col = missModel)) +
geom_errorbar(aes (ymin = (gwesp - 1.96*SE), ymax = (gwesp + 1.96*SE), width = 0.4)) +
xlab("") +
ylab("GWESP estimate (95% Confidence Int)") +
labs(col = "missType") +
geom_hline(yintercept = truegwesp, col = "darkblue") +
geom_hline(yintercept = 0, col = "black", lty = 2) +
geom_point() +
ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, sep = "")) +
theme_classic() +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
# legend.position = "none") +
geom_rect(aes(xmin = -Inf,
xmax = Inf,
ymin = (truegwesp - 1.96*truegwespSE),
ymax = (truegwesp + 1.96*truegwespSE)),
alpha = 0.01,
colour = NA,
fill = "grey") +
ylim(min(gwespPlotOrd$gwesp) - 2*max(abs(gwespPlotOrd$SE)), max(gwespPlotOrd$gwesp) + 2*max(abs(gwespPlotOrd$SE)))
gwespPlot
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
# code to try computing a fail rate dataset
# fixed value for the amount of estimation attempts
estAttempts = 50
# choose a proportion
chosenProp = 1
# annoyingly how the missingness is saved is missing in this set of results so...
# there should only be one way the missingness is saved, keep it as a string
chosenMiss = chosenMissValue
# subset the lists to only be a single chosen proportion
# get the index
chosenPropIndex = propMissList == chosenProp
# and subset
propMissChosenPropList = propMissList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]
# these lists will always be used so...
# there should only be one missing proportion
# note that there can sometimes be more than one missingness proportion in the list...
# turn the list of missingness proportions into a single numeric value
failMissProp = as.numeric(names(table(unlist(propMissChosenPropList))))
# check to make sure there's only one proportion
if(length(failMissProp) != 1){
stop("More than one missingness proportion after subsetting")
}
# calculate the success rate
successRate = as.numeric(table(unlist(missModelChosenPropList))/estAttempts)
# and which miss model it is, these can technically range from 1 to 4
failMissModel = as.numeric(names(table(unlist(missModelChosenPropList))))
# and put it all together
failData = data.frame(failRate = (1 - successRate),
netInd = rep(chosenNetInd, length(failMissModel)),
missModel = failMissModel,
missProp = rep(failMissProp, length(failMissModel)),
missSave = rep(chosenMiss, length(failMissModel)))
I want these functions to take the entire file list, choose specific networks and missingness proportions. The thing is, I might need a few helper functions beforehand to get the right data….
Multiple smaller functions doing very specific things are preferred over one giant function that does a lot of stuff all at once
### Function to turn the data from the extracted lists into plot-friendly format
# start from the list of coefficients and standard errors
# these can have varying rows per model because not all re-estimations converge
prepMissPlot = function(modelResList, modelSeList, missModelList, propMissList, chosenProp, chosenPara){
## prepMissPlot(modelResList, modelSeList, missModelList, propMissList, chosenProp, chosenPara) takes
## four different containing various estimated values and extracted indices to be used
## in making a plot-friendly dataset.
##
## Input:
## - modelResList: A k-long list containing the estimated model parameters for k re-estimated models.
##
## - modelSeList: A k-long list containing the estimated standard errors for k re-estimated models.
##
## - missModelList: A k-long list containing the chosen missingness model index for k re-estimated models.
##
## - propMissList: A k-long list containing the missingness proportion index for k re-estimated models.
##
## - chosenProp: The specified proportion of missingness to plot. Three possible values here:
## 1 = 10%, 3 = 35%, 6 = 60%.
##
## - chosenPara: The specified parameter to plot. Three possible values here:
## "edges" for edges, "altStar" for alternating stars, "gwesp" for gwesp.
##
## Output:
## - A k x 3 dataframe containing the parameter values, standard error values, and missing model index.
## The dataframe is ordered by parameter value (smallest to largest) for each missingness model.
## Check to make sure all the lists are of the same length
listLengths = lapply(list(modelResList, modelSeList, missModelList, propMissList), FUN = length)
## A unit test that spits out an error if there's more than one unique length in the list of lengths
if( length(unique(listLengths)) != 1 ){
stop("Lists have differing lengths")
}
# subset the lists to only be a single chosen proportion
chosenPropIndex = propMissList == chosenProp
modelResChosenPropList = modelResList[chosenPropIndex]
modelSeChosenPropList = modelSeList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]
# Specify indices for the chosen parameter
# doing it this way because parameter names get very lengthy
paraIndex = c(1:3)
modelParaNames = c("edges", "altStar", "gwesp")
chosenParaIndex = paraIndex[modelParaNames == chosenPara]
# making the dataset
plotData = data.frame(parameter = c(unlist(lapply(modelResChosenPropList, `[[`, chosenParaIndex) )),
SE = c(unlist(lapply(modelSeChosenPropList, `[[`, chosenParaIndex))),
missModel = factor(c(unlist(missModelChosenPropList)), levels = c(1,2,3,4), labels = c("Indep", "marErgm", "Latent", "mnarErgm")))
# change to allow 4 = mnarErgm and change missModel 2 = marErgm
# order the plot data
plotOrd = plotData[order(plotData[,"parameter"]),]
# one more ordering with the missing model type
plotOrd = plotOrd[order(plotOrd[,"missModel"]),]
# return the ordered plot data
return(plotOrd)
}
## actual plotting function
missCaterpillarPlot = function(plotData, truePara, trueSe, chosenPara, chosenNetInd, chosenProp, chosenMiss){
## missCaterpillarPlot(plotData, truePara, trueSe, chosenPara, chosenNetInd, chosenProp) is a very specialised
## plotting function that produces a caterpillar plot to compare parameter estimates for very specific data.
##
## Input:
## - plotData: Needs to be a k x 3 data frame for k re-estimated models containing parameter values,
## standard error values, and a missing model index. Ordering is optional, but visually
## useful. Ideally from prepMissPlot().
##
## - truePara: A single float (can technically be integer, but unlikely). Represents the true model estimate.
##
## - trueSe: A single float (can technically be integer, but unlikely). Represents the true standard error.
##
## - chosenPara: A string with three (but really two) options. The chosen string is the parameter that is
## plotted. The options, in order, are "edges", "altStar", and "gwesp"
##
## - chosenNetInd: An integer between 1 to 6 for current purposes. Each integer represents one of the six
## datasets from UCINet chosen for the current round of re-estimations.
##
## - chosenProp: A single integer or float with three options representing the proportion of missingness.
## Options are 1 = 10%, 3.5 = 35%, 6 = 60%.
##
## - chosenMiss: A value to indicate what missingness is saved as (e.g., NA is Peter, 0 is Todd).
## Input should be strings that are directly used for the label.
##
## Output:
## - The output should be a caterpillar plot containing with k datapoints for k re-estimated models.
## Colours are labelled and represent the missingness model used. True values are represented as a
## grey rectangle. All error bars or other depictions of spread are 95% confidence intervals. Plot
## title should also be adaptive to the parameters in the function.
# requires ggplot2 to work
require(ggplot2)
# caterpillar?
caterPlot = ggplot( data = plotData,
aes( x = 1:nrow(plotData), y = parameter, col = missModel)) +
geom_errorbar(aes (ymin = (parameter - 1.96*SE), ymax = (parameter + 1.96*SE), width = 0.4)) +
xlab("") +
ylab(paste(chosenPara,"estimate (95% Confidence Int)", sep = " ")) +
labs(col = "missType") +
geom_hline(yintercept = truePara, col = "darkblue") +
geom_hline(yintercept = 0, col = "black", lty = 2) +
geom_point() +
ggtitle(paste("Diagnostics - Net", chosenNetInd, " - Prop", chosenProp, " - Miss", chosenMiss ,sep = "")) +
theme_classic() +
theme(axis.ticks.x = element_blank(),
axis.text.x = element_blank()) +
geom_rect(aes(xmin = -Inf,
xmax = Inf,
ymin = (truePara - 1.96*trueSe),
ymax = (truePara + 1.96*trueSe)),
alpha = 0.01,
colour = NA,
fill = "grey") +
ylim(min(min(plotData$parameter) - 2*max(abs(plotData$SE)), (truePara - 1.96*trueSe)),
max(max(plotData$parameter) + 2*max(abs(plotData$SE)), (truePara + 1.96*trueSe)))
# print out the plot
caterPlot
}
## function for calculating failure rate (for each datafile read in)
# a function for this because I overwrite which networks I read in for the caterpillar plots
# this function captures every combination of which missingness model, which chosen missingness, which missingness proportion, and which network.
prepFailPlot = function(propMissList, missModelList, chosenNetInd, chosenProp, chosenMiss, estAttempts = 50){
## prepFailPlot(propMissList, missModelList, chosenNetInd, chosenProp, chosenMiss, estAttempts = 50) calculates
## the failure rates of re-estimations of degraded networks with specific object type requirements.
##
## Input:
## - missModelList: A k-long list containing the chosen missingness model index for k re-estimated models.
## Can technically range between 1 to 4.
##
## - propMissList: A k-long list containing the missingness proportion index for k re-estimated models.
##
## - chosenNetInd: An integer between 1 to 6 for current purposes. Each integer represents one of the six
## datasets from UCINet chosen for the current round of re-estimations.
##
## - chosenProp: A single integer or float with three options representing the proportion of missingness.
## Options are 1 = 10%, 3 = 35% (... yes, it's annoying.), 6 = 60%.
##
## - chosenMiss: A string to indicate what missingness is saved as (e.g., "NA" is Peter, "0" is Todd).
##
## Output:
## - The output should be a data frame containing the failure rate for the specified combination of network,
## proportion of missingness, how the missingness is saved, and all the missingness models in the input list.
# subset the lists to only be a single chosen proportion
# get the index
chosenPropIndex = propMissList == chosenProp
# and subset
propMissChosenPropList = propMissList[chosenPropIndex]
missModelChosenPropList = missModelList[chosenPropIndex]
# turn the list of missingness proportions into a single numeric value
failMissProp = as.numeric(names(table(unlist(propMissChosenPropList))))
# check to make sure there's only one proportion
if(length(failMissProp) != 1){
stop("More than one missingness proportion after subsetting")
}
# calculate the success rate
successRate = as.numeric(table(unlist(missModelChosenPropList))/estAttempts)
# and which miss model it is, these can technically range from 1 to 4
failMissModel = as.numeric(names(table(unlist(missModelChosenPropList))))
# and put it all together
failData = data.frame(failRate = (1 - successRate),
netInd = rep(chosenNetInd, length(failMissModel)),
missModel = failMissModel,
missProp = rep(failMissProp, length(failMissModel)),
missSave = rep(chosenMiss, length(failMissModel)))
# return the data frame
return(failData)
}
## no need for a function to plot fail rates for each combination since I would only want one plot
# choose a network
chosenNetInd = 2
# generate relative bias and relative variance (...standard error?) data structures
relBiasData = data.frame(rBias = c(), netInd = c(), missSave = c(), propMiss = c(), missModel = c(), paraName = c())
relSeData = data.frame(rSe = c(), netInd = c(), missSave = c(), propMiss = c(), missModel = c(), paraName = c())
# select a parameter
modelParaOptions = c("edges", "altStar", "gwesp")
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
## parameter SE missModel
## 8 -3.572767 1.424147 Indep
## 11 -1.985151 2.002649 Indep
## 16 -1.838034 2.499750 Indep
## 4 -1.770333 2.842933 Indep
## 5 -1.659360 2.455153 Indep
## 17 -1.632414 2.516041 Indep
## parameter SE missModel
## 7 -2.667740 1.1581335 Indep
## 6 -2.651687 1.1342895 Indep
## 10 -2.519181 0.8830086 Indep
## 13 -2.484591 0.9431191 Indep
## 14 -2.391916 0.9567384 Indep
## 18 -2.339140 0.8401319 Indep
## parameter SE missModel
## 15 3.713921 0.4693278 Indep
## 3 3.805494 0.5092882 Indep
## 18 3.847856 0.5059986 Indep
## 12 3.867887 0.5433778 Indep
## 17 3.937885 0.5014629 Indep
## 11 3.945033 0.5001198 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
# test run for the fail rate data
# a data frame to initialise
failRateData = data.frame()
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20230814_missNetReest_net",
"20230814_missNetReest_net",
"20231010_missNetReest_net")
chunkMnarNames = c("20231011_missErgmNetReest_Miss",
"20231011_missErgmNetReest_Miss",
"20231011_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all chosen networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 27 -2.0450471 1.355888 Indep
## 17 -1.6887316 1.372465 Indep
## 46 -1.0782637 1.986377 Indep
## 14 -1.0079846 1.794369 Indep
## 30 -0.9901474 1.690993 Indep
## 23 -0.9677308 1.709223 Indep
## parameter SE missModel
## 29 -2.380226 0.8948844 Indep
## 34 -2.332890 0.8567828 Indep
## 39 -2.225284 0.6481276 Indep
## 12 -2.203632 0.7067738 Indep
## 25 -2.197335 0.7339564 Indep
## 19 -2.194530 0.6131555 Indep
## parameter SE missModel
## 42 2.795684 0.4150812 Indep
## 35 2.841932 0.3840148 Indep
## 5 2.906557 0.3822707 Indep
## 33 2.937962 0.3854581 Indep
## 47 2.939097 0.4309207 Indep
## 11 2.950955 0.3905500 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 13 -5.050360 NA Indep
## 9 -4.101907 0.4138049 Indep
## 12 -4.095339 0.3877220 Indep
## 5 -4.089140 0.3838097 Indep
## 2 -4.052289 0.4108890 Indep
## 14 -4.042779 0.4425365 Indep
## parameter SE missModel
## 14 -0.4271309 0.2344825 Indep
## 1 -0.3689833 0.2283937 Indep
## 9 -0.3686057 0.2258536 Indep
## 2 -0.3471691 0.2368848 Indep
## 17 -0.3043420 0.2638864 Indep
## 16 -0.2999977 0.2291898 Indep
## parameter SE missModel
## 13 0.5529485 NA Indep
## 15 1.3536200 0.3463476 Indep
## 4 1.4155391 0.3970444 Indep
## 10 1.5321071 0.3904505 Indep
## 3 1.5354860 0.3464884 Indep
## 11 1.5899620 0.3939409 Indep
## parameter SE missModel
## 33 -6.661065 1.161756 Indep
## 43 -6.520892 1.108532 Indep
## 9 -6.430122 1.040719 Indep
## 42 -6.423533 1.099057 Indep
## 40 -6.348662 1.106593 Indep
## 10 -6.331207 1.156858 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 28 -0.9007415 0.2363367 Indep
## 45 -0.7825025 0.2400551 Indep
## 44 -0.7622703 0.2476933 Indep
## 18 -0.7536412 0.2325391 Indep
## 3 -0.7459583 0.2495798 Indep
## 16 -0.7435994 0.2477373 Indep
## parameter SE missModel
## 10 1.053478 0.1945004 Indep
## 29 1.204949 0.1979167 Indep
## 19 1.224828 0.2129165 Indep
## 42 1.239121 0.2127094 Indep
## 33 1.246916 0.2216857 Indep
## 9 1.261516 0.2118016 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(2, 4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20231004_missNetReest_net",
"20231004_missNetReest_net",
"20231004_missNetReest_net",
"20231010_missNetReest_net")
chunkMnarNames = c("20231011_missErgmNetReest_Miss",
"20231011_missErgmNetReest_Miss",
"20231011_missErgmNetReest_Miss",
"20231011_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissPropFloat,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 7 -3.3781085 1.571063 Indep
## 6 -2.8192660 1.761062 Indep
## 5 -1.2349580 3.762309 Indep
## 10 -1.0923401 4.398689 Indep
## 2 -0.9781865 3.853383 Indep
## 8 -0.5903796 2.911597 Indep
## parameter SE missModel
## 12 -4.558152 4.043885 Indep
## 14 -3.474548 2.506201 Indep
## 13 -3.227319 2.943347 Indep
## 1 -3.187661 2.242548 Indep
## 16 -2.702853 1.253112 Indep
## 15 -2.632289 1.020986 Indep
## parameter SE missModel
## 4 3.323302 0.5682986 Indep
## 1 3.437627 0.6409324 Indep
## 14 3.484401 0.6621262 Indep
## 8 3.566714 0.6259340 Indep
## 3 3.696938 0.6449210 Indep
## 11 3.741881 0.6132945 Indep
## parameter SE missModel
## 16 -2.540926 1.468209 Indep
## 33 -2.529681 1.691315 Indep
## 35 -2.427015 1.466072 Indep
## 29 -2.020486 1.593190 Indep
## 26 -1.943041 1.608631 Indep
## 1 -1.765409 1.619538 Indep
## parameter SE missModel
## 12 -3.536047 1.825336 Indep
## 23 -3.329675 2.333945 Indep
## 18 -2.818693 1.409608 Indep
## 37 -2.728979 1.230028 Indep
## 34 -2.644825 1.234967 Indep
## 10 -2.596926 1.057250 Indep
## parameter SE missModel
## 3 2.497359 0.4892193 Indep
## 7 2.510058 0.4429165 Indep
## 30 2.617554 0.5050321 Indep
## 2 2.640901 0.5463155 Indep
## 27 2.712829 0.4962674 Indep
## 13 2.731805 0.4496286 Indep
## parameter SE missModel
## 6 -4.763305 0.5927591 Indep
## 12 -4.423675 0.5544808 Indep
## 1 -4.256021 0.5488157 Indep
## 15 -4.180141 0.5269770 Indep
## 2 -4.162467 0.5045574 Indep
## 5 -4.153906 0.5770239 Indep
## parameter SE missModel
## 6 -0.6946950 0.2194906 Indep
## 5 -0.6897158 0.2631167 Indep
## 15 -0.6294828 0.2489500 Indep
## 12 -0.5802322 0.2540797 Indep
## 1 -0.5784914 0.2739039 Indep
## 10 -0.5771192 0.2790234 Indep
## parameter SE missModel
## 4 1.137951 0.4996599 Indep
## 8 1.178245 0.4959795 Indep
## 14 1.329230 0.3960599 Indep
## 7 1.429522 0.4758393 Indep
## 13 1.560362 0.4546985 Indep
## 17 1.770557 0.5469616 Indep
## parameter SE missModel
## 9 -6.980429 1.139007 Indep
## 11 -6.766356 1.286395 Indep
## 45 -6.555106 1.242611 Indep
## 48 -6.540552 1.100341 Indep
## 25 -6.434793 1.285338 Indep
## 41 -6.358277 1.639629 Indep
## parameter SE missModel
## 14 -1.277323 0.3909004 Indep
## 23 -1.185686 0.2943182 Indep
## 32 -1.167117 0.2680503 Indep
## 31 -1.159366 0.3823150 Indep
## 16 -1.136381 0.3262691 Indep
## 7 -1.063438 0.2624494 Indep
## parameter SE missModel
## 41 0.8504767 0.2874547 Indep
## 36 0.9067248 0.2625241 Indep
## 50 1.1343697 0.3153175 Indep
## 18 1.2385265 0.2613634 Indep
## 22 1.2590632 0.3627478 Indep
## 27 1.2599829 0.3063573 Indep
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(2, 4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231011_missNetReest_net",
"20231011_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 5 -5.789352 2.062231 Indep
## 20 -4.598751 1.850594 Indep
## 8 -3.903931 2.399643 Indep
## 12 -3.574117 2.122361 Indep
## 9 -2.868342 1.920461 Indep
## 21 -1.942612 1.960238 Indep
## parameter SE missModel
## 7 -12.189565 14.285280 Indep
## 10 -8.636441 7.301452 Indep
## 1 -7.742465 8.269611 Indep
## 3 -7.435589 7.489878 Indep
## 14 -5.732763 5.862388 Indep
## 2 -3.727859 4.052053 Indep
## parameter SE missModel
## 4 2.565689 0.6949242 Indep
## 21 2.737034 0.7987535 Indep
## 6 2.812536 0.7580205 Indep
## 16 3.077702 0.9303100 Indep
## 18 3.192135 0.7561982 Indep
## 19 3.204291 0.7683349 Indep
## parameter SE missModel
## 6 -9.066671 NA Indep
## 35 -3.983056 1.482096 Indep
## 25 -3.688016 1.425409 Indep
## 10 -2.518948 1.491961 Indep
## 24 -2.343804 2.379993 Indep
## 28 -2.265829 2.084995 Indep
## parameter SE missModel
## 13 -10.600876 9.199102 Indep
## 12 -7.608177 5.976598 Indep
## 4 -6.967102 4.550087 Indep
## 8 -5.107308 3.390178 Indep
## 32 -3.863878 3.145042 Indep
## 16 -3.732614 2.720044 Indep
## parameter SE missModel
## 6 1.157657 NA Indep
## 11 1.498300 0.8333988 Indep
## 26 1.608398 0.7615361 Indep
## 17 2.343988 0.8712003 Indep
## 1 2.363417 0.6172139 Indep
## 14 2.388523 0.5638172 Indep
## parameter SE missModel
## 18 -7.915567 NA Indep
## 10 -6.006189 NA Indep
## 4 -5.514273 1.1385876 Indep
## 15 -5.061362 0.9744425 Indep
## 13 -4.919149 1.1736797 Indep
## 8 -4.631797 0.8727901 Indep
## parameter SE missModel
## 4 -1.0541759 0.1900847 Indep
## 13 -0.8025529 0.2567004 Indep
## 19 -0.7912228 0.2738069 Indep
## 15 -0.7703994 0.2574297 Indep
## 2 -0.7660896 0.3085943 Indep
## 11 -0.7373803 0.3518431 Indep
## parameter SE missModel
## 10 0.7233999 NA Indep
## 6 0.8481073 0.6061540 Indep
## 1 1.6349917 0.7578088 Indep
## 12 1.8921170 0.7662248 Indep
## 20 1.9075172 0.8669149 Indep
## 11 1.9556487 0.6164461 Indep
## parameter SE missModel
## 4 -8.649119 2.080996 Indep
## 26 -8.272475 2.215487 Indep
## 11 -7.824375 1.694336 Indep
## 47 -7.136581 2.073979 Indep
## 32 -6.928534 1.651045 Indep
## 33 -6.845862 1.545135 Indep
## parameter SE missModel
## 44 -2.062308 0.9548628 Indep
## 41 -1.750173 1.2046102 Indep
## 10 -1.622288 0.7202489 Indep
## 14 -1.518258 0.6462700 Indep
## 28 -1.463635 0.9698189 Indep
## 1 -1.456279 0.4954156 Indep
## parameter SE missModel
## 47 0.6587452 0.4476198 Indep
## 26 0.6732766 0.4701941 Indep
## 11 0.8695871 0.5348307 Indep
## 4 0.9417435 0.4040563 Indep
## 9 1.0394754 0.4892135 Indep
## 20 1.0694637 0.5600053 Indep
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231012_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 8 -2.601463 1.256855 Indep
## 24 -2.413046 1.108934 Indep
## 10 -2.395926 1.374799 Indep
## 15 -2.383965 1.253623 Indep
## 9 -2.176266 1.247728 Indep
## 38 -2.170672 1.111399 Indep
## parameter SE missModel
## 5 -2.209327 0.5979698 Indep
## 23 -2.189741 0.6877216 Indep
## 30 -2.066400 0.5400037 Indep
## 4 -2.026562 0.4440535 Indep
## 13 -1.986430 0.4411822 Indep
## 39 -1.977470 0.4192232 Indep
## parameter SE missModel
## 2 2.409912 0.2867801 Indep
## 25 2.416764 0.2709456 Indep
## 3 2.762928 0.3026043 Indep
## 31 2.874716 0.3174043 Indep
## 26 2.911591 0.3013970 Indep
## 6 2.946111 0.3113748 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 2 -1.6490952 1.703432 Indep
## 9 -1.4773923 1.747493 Indep
## 23 -1.3377862 1.848695 Indep
## 19 -0.9747808 1.645787 Indep
## 12 -0.8558571 2.243823 Indep
## 5 -0.7947030 2.103760 Indep
## parameter SE missModel
## 21 -2.568440 1.1667600 Indep
## 14 -2.378623 0.8922353 Indep
## 36 -2.341166 0.9318989 Indep
## 8 -2.336268 1.1878642 Indep
## 46 -2.330456 0.8169489 Indep
## 48 -2.304476 1.1932852 Indep
## parameter SE missModel
## 5 1.549235 0.3025049 Indep
## 29 1.710355 0.3314923 Indep
## 24 1.878453 0.3544091 Indep
## 44 1.900765 0.3554659 Indep
## 18 2.099150 0.3389286 Indep
## 39 2.122251 0.3851450 Indep
## parameter SE missModel
## 29 -1.6332365 1.153097 Indep
## 48 -1.3210907 1.395869 Indep
## 22 -0.9199276 1.420857 Indep
## 18 -0.7142808 1.432538 Indep
## 12 -0.6703803 1.449203 Indep
## 10 -0.6670701 1.636150 Indep
## parameter SE missModel
## 34 -2.003352 0.6859301 Indep
## 26 -1.947103 0.6863618 Indep
## 44 -1.940004 0.6037775 Indep
## 38 -1.911503 0.5101568 Indep
## 30 -1.903417 0.4739233 Indep
## 6 -1.901365 0.7485541 Indep
## parameter SE missModel
## 48 1.591746 0.3083361 Indep
## 4 1.612123 0.2829673 Indep
## 18 1.694798 0.2803756 Indep
## 39 1.788530 0.2972528 Indep
## 26 1.793877 0.2787256 Indep
## 6 1.799936 0.2751293 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 6 -4.264797 0.3877630 Indep
## 4 -4.192364 0.3555877 Indep
## 11 -4.155292 0.3629730 Indep
## 15 -4.110912 0.3622067 Indep
## 3 -4.093634 0.3679711 Indep
## 14 -4.090357 0.3716855 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 1 -0.1933919 0.2208256 Indep
## 11 -0.1928590 0.2280006 Indep
## 7 -0.1823017 0.2226638 Indep
## 14 -0.1312494 0.2360561 Indep
## 15 -0.1227517 0.2318603 Indep
## 13 -0.1205444 0.2556446 Indep
## parameter SE missModel
## 6 0.895099 0.2581558 Indep
## 9 1.169641 0.3098932 Indep
## 8 1.205350 0.3021720 Indep
## 5 1.214879 0.3145655 Indep
## 10 1.268217 0.3100610 Indep
## 4 1.294713 0.3410631 Indep
## parameter SE missModel
## 9 -7.562327 1.097650 Indep
## 5 -7.320652 1.176731 Indep
## 20 -7.263994 1.127838 Indep
## 33 -7.255869 1.161143 Indep
## 2 -7.248503 1.104335 Indep
## 50 -7.185518 1.135164 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 49 -0.5511893 0.2380867 Indep
## 28 -0.5461336 0.2205140 Indep
## 18 -0.5001765 0.2197595 Indep
## 6 -0.4852246 0.2300400 Indep
## 43 -0.4411331 0.2405094 Indep
## 41 -0.4401505 0.2326350 Indep
## parameter SE missModel
## 10 0.7147952 0.1443590 Indep
## 20 0.7862908 0.1544625 Indep
## 5 0.8066656 0.1520125 Indep
## 9 0.8667842 0.1670487 Indep
## 40 0.8687591 0.1599717 Indep
## 11 0.8730449 0.1547718 Indep
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231005_missNetReest_net",
"20231012_missNetReest_net",
"20231012_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissPropFloat,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 13 -3.337059 0.8859680 Indep
## 2 -2.860776 0.7802048 Indep
## 18 -2.851610 1.0974701 Indep
## 37 -2.726147 0.8829394 Indep
## 9 -2.508242 0.9298530 Indep
## 3 -2.472002 0.8998251 Indep
## parameter SE missModel
## 15 -1.436287 0.4408012 Indep
## 44 -1.234459 0.5375122 Indep
## 4 -1.200908 0.4771346 Indep
## 38 -1.151209 0.4432072 Indep
## 12 -1.119609 0.3651799 Indep
## 33 -1.107068 0.5208508 Indep
## parameter SE missModel
## 13 0.8143432 0.1433313 Indep
## 22 0.9230283 0.1460986 Indep
## 11 0.9431998 0.1522376 Indep
## 1 0.9683054 0.1539687 Indep
## 30 0.9840596 0.1521053 Indep
## 39 1.0125468 0.1555323 Indep
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 18 -3.903673 1.139670 Indep
## 21 -2.816481 1.616045 Indep
## 19 -2.459853 1.432304 Indep
## 47 -2.184766 1.735768 Indep
## 4 -1.885313 1.392126 Indep
## 6 -1.847751 1.171954 Indep
## parameter SE missModel
## 50 -2.431769 0.9117054 Indep
## 41 -1.988648 0.9647086 Indep
## 38 -1.952683 0.7673793 Indep
## 25 -1.862815 1.0076413 Indep
## 34 -1.652086 0.6791568 Indep
## 27 -1.521875 0.7766990 Indep
## parameter SE missModel
## 18 0.1781799 0.1574842 Indep
## 21 0.3824131 0.1777333 Indep
## 19 0.4221073 0.1832613 Indep
## 9 0.4592685 0.1906219 Indep
## 47 0.4712505 0.1916285 Indep
## 5 0.5133984 0.1805051 Indep
## parameter SE missModel
## 6 -2.659930 0.8906202 Indep
## 43 -2.484435 0.9949309 Indep
## 42 -2.379924 1.0514690 Indep
## 2 -2.249203 1.0359835 Indep
## 19 -2.083134 1.2432392 Indep
## 35 -1.883939 1.1410617 Indep
## parameter SE missModel
## 29 -1.382605 0.6180631 Indep
## 8 -1.350535 0.4879766 Indep
## 24 -1.324427 0.4870290 Indep
## 13 -1.320548 0.5947146 Indep
## 20 -1.271720 0.5316526 Indep
## 11 -1.204599 0.5222904 Indep
## parameter SE missModel
## 1 0.4634877 0.1972899 Indep
## 38 0.5640863 0.1936564 Indep
## 19 0.5753617 0.2009085 Indep
## 31 0.5896167 0.2008107 Indep
## 33 0.6195691 0.1980860 Indep
## 50 0.6667230 0.2139163 Indep
## parameter SE missModel
## 2 -4.417349 0.4255926 Indep
## 1 -4.366253 0.3996069 Indep
## 3 -4.016838 0.3891978 Indep
## 6 -4.546791 0.4310100 marErgm
## 5 -4.453206 0.4022960 marErgm
## 7 -4.263979 0.3547582 marErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 3 -0.02212587 0.2536399 Indep
## 1 0.30771625 0.2589552 Indep
## 2 0.39936734 0.2535769 Indep
## 4 -0.14067356 0.2359803 marErgm
## 7 -0.04922777 0.2510923 marErgm
## 5 0.43782339 0.2521844 marErgm
## parameter SE missModel
## 2 0.6856637 0.2854993 Indep
## 1 0.8319856 0.3128379 Indep
## 3 1.2445683 0.3393180 Indep
## 6 0.4876890 0.2788660 marErgm
## 5 0.6333639 0.2932883 marErgm
## 7 1.4323621 0.3900154 marErgm
## parameter SE missModel
## 9 -9.680129 1.321978 Indep
## 24 -9.409439 1.341914 Indep
## 19 -9.138619 1.273927 Indep
## 22 -8.715590 1.274077 Indep
## 5 -8.703109 1.269927 Indep
## 14 -8.598590 1.232700 Indep
## parameter SE missModel
## 34 -0.4271915 0.2338981 Indep
## 35 -0.2108234 0.2405158 Indep
## 29 -0.1825741 0.2283651 Indep
## 39 -0.1809863 0.2414358 Indep
## 12 -0.1535835 0.2152647 Indep
## 49 -0.0967697 0.2282145 Indep
## parameter SE missModel
## 24 0.2549825 0.1382805 Indep
## 43 0.2645356 0.1409349 Indep
## 3 0.3558289 0.1495326 Indep
## 10 0.3681067 0.1433609 Indep
## 41 0.3699661 0.1499506 Indep
## 19 0.3730467 0.1402606 Indep
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Todd"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1, 2, 4, 5, 6)
# TODO: vector of file names for regex
chunkRdataNames = c("20231006_missNetReest_net",
"20231006_missNetReest_net",
"20231006_missNetReest_net",
"20231011_missNetReest_net",
"20231011_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss",
"20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
# make a temp rbias/rSe data structure to bind with the data frame
tempRelBias = data.frame(rBias = (testPlotData$parameter - tempTruePara)/tempTruePara,
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
tempRelSe = data.frame(rSe = (testPlotData$SE/tempTrueSe),
netInd = rep(chosenNetInd, nrow(testPlotData)),
missSave = rep(chosenMissLabel, nrow(testPlotData)),
propMiss = rep(chunkMissProp, nrow(testPlotData)),
missModel = testPlotData$missModel,
paraName = rep(chosenParaName, nrow(testPlotData)))
# bind them
relBiasData = rbind(relBiasData, tempRelBias)
relSeData = rbind(relSeData, tempRelSe)
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 31 -4.460967 0.5810290 Indep
## 17 -4.391896 0.6414831 Indep
## 21 -4.251427 0.7467956 Indep
## 34 -4.235010 0.7519899 Indep
## 18 -3.938098 0.7389818 Indep
## 1 -3.887855 0.7437958 Indep
## parameter SE missModel
## 20 -0.54170566 0.3171848 Indep
## 24 -0.31403746 0.3400528 Indep
## 7 -0.24488276 0.3682580 Indep
## 19 -0.22044346 0.2847240 Indep
## 3 -0.07633112 0.2590887 Indep
## 29 -0.05303975 0.2633457 Indep
## parameter SE missModel
## 15 0.1147738 0.1344550 Indep
## 2 0.2438565 0.1288911 Indep
## 17 0.2489000 0.1466968 Indep
## 12 0.2720479 0.1266465 Indep
## 28 0.2784234 0.1299738 Indep
## 23 0.2830477 0.1217562 Indep
## parameter SE missModel
## 34 -4.117064 0.7813483 Indep
## 8 -3.866036 0.8600851 Indep
## 22 -3.575605 0.8914025 Indep
## 2 -3.486708 1.0883342 Indep
## 41 -3.341053 1.1678912 Indep
## 35 -3.238912 0.8570279 Indep
## parameter SE missModel
## 11 -1.6951627 0.7913244 Indep
## 45 -1.0758965 0.4881540 Indep
## 16 -0.6649367 0.4877096 Indep
## 24 -0.5145593 0.5383474 Indep
## 5 -0.5060344 0.3733735 Indep
## 26 -0.3739797 0.5027186 Indep
## parameter SE missModel
## 28 0.02784400 0.1756171 Indep
## 42 0.03321241 0.2095743 Indep
## 2 0.07508869 0.1726614 Indep
## 25 0.11500553 0.1873731 Indep
## 34 0.12822292 0.1818427 Indep
## 1 0.13671639 0.1920777 Indep
## parameter SE missModel
## 21 -4.186214 0.8410843 Indep
## 15 -3.781792 0.7574983 Indep
## 39 -3.415426 0.7451268 Indep
## 30 -3.321968 0.8540362 Indep
## 40 -3.248794 0.9085335 Indep
## 14 -3.190952 0.7852334 Indep
## parameter SE missModel
## 31 -0.9245958 0.4509537 Indep
## 19 -0.7191358 0.4810619 Indep
## 17 -0.4522326 0.4244435 Indep
## 36 -0.4507429 0.4204314 Indep
## 9 -0.4244602 0.3580949 Indep
## 8 -0.3759830 0.3769310 Indep
## parameter SE missModel
## 21 -0.32176800 0.2684163 Indep
## 15 0.01944446 0.2657635 Indep
## 42 0.07392221 0.2709689 Indep
## 6 0.13021100 0.2911042 Indep
## 41 0.19774587 0.2389817 Indep
## 40 0.21448691 0.2095101 Indep
## parameter SE missModel
## 1 -4.749368 0.4281441 Indep
## 3 -4.298512 0.5375795 Indep
## 2 -4.171256 0.5552278 Indep
## 8 -17812.803613 10.6302992 marErgm
## 5 -4.781183 0.5044661 marErgm
## 4 -4.535313 0.5246310 marErgm
## parameter SE missModel
## 2 0.1902914 0.3616075 Indep
## 3 0.4397348 0.2924576 Indep
## 1 0.6479313 0.2579289 Indep
## 7 0.1221795 0.3665846 marErgm
## 6 0.3604831 0.3471392 marErgm
## 4 0.6110184 0.2724064 marErgm
## parameter SE missModel
## 1 0.3854443 3.015044e-01 Indep
## 3 0.4696144 3.230628e-01 Indep
## 2 0.8850460 4.268974e-01 Indep
## 8 -22.0702887 2.680862e+115 marErgm
## 5 0.1396263 3.000309e-01 marErgm
## 4 0.2854597 2.927772e-01 marErgm
## parameter SE missModel
## 4 -11.67967 1.761618 Indep
## 49 -11.60233 1.668491 Indep
## 19 -10.86087 1.509740 Indep
## 33 -10.81512 1.729544 Indep
## 34 -10.27515 1.710123 Indep
## 26 -10.15169 1.589956 Indep
## parameter SE missModel
## 41 -0.29712466 0.2683588 Indep
## 33 -0.20487467 0.2645884 Indep
## 24 -0.18751920 0.2627259 Indep
## 12 -0.14089183 0.2527441 Indep
## 2 0.01010340 0.2545368 Indep
## 5 0.02821066 0.2418109 Indep
## parameter SE missModel
## 50 -0.164454094 0.2209245 Indep
## 25 -0.131737953 0.1781773 Indep
## 18 -0.093227217 0.2367062 Indep
## 29 -0.082722105 0.2330193 Indep
## 42 -0.040174880 0.2351467 Indep
## 31 0.006732098 0.1901246 Indep
The plots below are specifically plots with very sparse amounts of successful re-estimations.
# 10% missingness - Peter
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 1
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)
# note:
# net 3 is so messed up it has ONE converged estimation for this set
# 20231010_missNetReest_net3_missModel1_prop1_trial9.RData
# handle fail rates separately. these data structures are very unstandardised.
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")
chunkMnarNames = c("20231011_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 1 0.06730704 2.831189 mnarErgm
## 6 0.41110006 2.700997 mnarErgm
## 10 0.44803684 3.233691 mnarErgm
## 7 0.62303532 3.283356 mnarErgm
## 8 1.28406049 3.326750 mnarErgm
## 9 1.40853051 3.443864 mnarErgm
## parameter SE missModel
## 5 -3.012581 1.1944335 mnarErgm
## 3 -2.811731 1.0764746 mnarErgm
## 11 -2.705572 0.9157626 mnarErgm
## 2 -2.677974 0.9916606 mnarErgm
## 4 -2.656809 0.9220835 mnarErgm
## 9 -2.590910 0.8378803 mnarErgm
## parameter SE missModel
## 5 3.275134 0.3421808 mnarErgm
## 9 3.344275 0.3413299 mnarErgm
## 2 3.398827 0.3541200 mnarErgm
## 8 3.406120 0.3646437 mnarErgm
## 3 3.407984 0.3722419 mnarErgm
## 4 3.423570 0.3405085 mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
# the 'converged' model for net 3
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = 3
# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231010_missNetReest_net", chosenNetInd, sep =""), outputList)
# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
}
## parameter SE missModel
## 1 -89.26956 NA Indep
## parameter SE missModel
## 1 21.2504 NA Indep
## parameter SE missModel
## 1 2.785266 NA Indep
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
# 35% missingness
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5 # specifically for 35% missingness because I regex by taking the first number next to 'prop'.
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)
# note:
# net 3 is still so messed up it has three converged estimation for this set (out of 200)
# 20231010_missNetReest_net3_missModel2_prop3.5_trial11.RData
# 20231010_missNetReest_net3_missModel3_prop3.5_trial12.RData
# 20231010_missNetReest_net3_missModel3_prop3.5_trial19.RData
# no mnars, so the loop won't work.
# handle fail rates separately. these data structures are very unstandardised.
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")
chunkMnarNames = c("20231011_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissPropFloat,
chosenMiss = chosenMissValue) %>%
print()
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 3 1.354497 2.909963 mnarErgm
## 18 2.906806 2.973310 mnarErgm
## 13 2.981297 3.469670 mnarErgm
## 9 3.067972 3.937823 mnarErgm
## 2 3.332524 3.476768 mnarErgm
## 4 3.417883 3.512137 mnarErgm
## parameter SE missModel
## 10 -4.981270 1.855869 mnarErgm
## 20 -4.887841 1.760132 mnarErgm
## 6 -4.644249 1.794332 mnarErgm
## 8 -4.517936 1.729852 mnarErgm
## 16 -4.301038 1.535839 mnarErgm
## 11 -4.210703 1.294873 mnarErgm
## parameter SE missModel
## 12 1.962097 0.2659402 mnarErgm
## 16 2.015492 0.2764007 mnarErgm
## 11 2.038997 0.2944418 mnarErgm
## 19 2.079809 0.3464684 mnarErgm
## 7 2.083504 0.3216842 mnarErgm
## 6 2.096097 0.2653546 mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
### A section specifically for the three re-estimations of net3 that made it
# no MNAR files, so removing them from the code below
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = 3
# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231010_missNetReest_net", chosenNetInd, sep =""), outputList)
# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissPropFloat,
chosenMiss = chosenMissValue) %>%
print()
}
## parameter SE missModel
## 1 -59.62898 NA marErgm
## 2 -31.73605 NA Latent
## 3 -17.05925 NA Latent
## parameter SE missModel
## 1 15.661456 NA marErgm
## 2 1.350935 NA Latent
## 3 5.424564 NA Latent
## parameter SE missModel
## 1 -0.2770482 NA marErgm
## 3 -1.1609499 NA Latent
## 2 12.4001035 NA Latent
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
# 60% Peter
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "Peter"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 1
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = c(1)
# note:
# net 3 is still so messed up it has two converged estimation for this set (out of 200)
# 20231011_missNetReest_net3_missModel2_prop6_trial40
# 20231011_missNetReest_net3_missModel3_prop6_trial46
# no mnars, so the loop won't work.
# handle fail rates separately. these data structures are very unstandardised.
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 1 -19.471644 NA Indep
## 2 -5.164890 1.341253 Indep
## 3 -12.764592 NA marErgm
## 12 -4.443987 1.069731 mnarErgm
## 8 -4.212305 1.364534 mnarErgm
## 26 -4.030651 1.201752 mnarErgm
## parameter SE missModel
## 2 -1.676416 0.2491544 Indep
## 1 3.753020 NA Indep
## 3 2.523763 NA marErgm
## 24 -16.309377 12.3213518 mnarErgm
## 31 -7.352189 4.5866855 mnarErgm
## 13 -6.388176 3.7709916 mnarErgm
## parameter SE missModel
## 1 2.895285 NA Indep
## 2 4.855019 0.6582781 Indep
## 3 1.836877 NA marErgm
## 20 1.610596 0.6224760 mnarErgm
## 30 1.847450 0.3734943 mnarErgm
## 24 1.860921 0.2396976 mnarErgm
### A section specifically for the three re-estimations of net3 that made it
# no MNAR files, so removing them from the code below
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = 3
# file list for the chosen network index
chosenNetOutFiles = grep(paste("20231011_missNetReest_net", chosenNetInd, sep =""), outputList)
# loop to load
chosenNetOutList = outputList[chosenNetOutFiles]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
}
## parameter SE missModel
## 1 -5.022512 NA marErgm
## 2 -62.259251 NA Latent
## parameter SE missModel
## 1 2.548622 NA marErgm
## 2 17.492617 NA Latent
## parameter SE missModel
## 1 -1.027786 NA marErgm
## 2 -2.287233 NA Latent
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
Literally nothing. No successful re-estimations for any missingness model for net 3.
# 35% missingness - Todd
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "TOdd"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 3
chunkMissPropFloat = 3.5
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = 3
# handle fail rates separately. these data structures are very unstandardised.
# TODO: vector of file names for regex
chunkRdataNames = c("20231005_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissPropFloat,
chosenMiss = chosenMissValue) %>%
print()
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 1 -2.2089033 1.681170 Indep
## 2 -2.7527563 1.471588 Latent
## 3 0.9039891 1.649547 mnarErgm
## parameter SE missModel
## 1 -1.112195 0.5174151 Indep
## 2 -1.248764 0.4822099 Latent
## 3 -1.302105 0.4873029 mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
## parameter SE missModel
## 1 2.437951 0.4559972 Indep
## 2 2.961205 0.5207720 Latent
## 3 1.119011 0.2363328 mnarErgm
## Warning: Removed 1 rows containing missing values (`geom_hline()`).
# 35% missingness - Todd
# loading in the output results
outputList = list.files(here("Output", "20230825_simMissReest", "TOdd"))
# applying the code above to different reestimations
# TODO: specify the missingness saved for current chunk, 1 = "NA"/peter, 2 = "0"/Todd
chunkMissSave = 2
# TODO: specify the chosen missingness proportion for each chunk (1/3 or 3.5/6)
chunkMissProp = 6
# take out the index for the chosen missingness saved value
chosenMissLabel = missSaveLabel[chunkMissSave]
chosenMissValue = missSaveValue[chunkMissSave]
# now looping with a specific vector of applicable networks
# TODO: specify the UCINet networks for the chunk, these are networks with enough re-estimations
chunkNetworks = 3
# note:
# net 3 is so messed up it has ONE converged estimation for this set
# 20231010_missNetReest_net3_missModel1_prop1_trial9.RData
# handle fail rates separately. these data structures are very unstandardised.
# TODO: vector of file names for regex
chunkRdataNames = c("20231006_missNetReest_net")
chunkMnarNames = c("20231012_missErgmNetReest_Miss")
# vector of parameter names to loop over
modelParaOptions = c("edges", "altStar", "gwesp")
# loop for all specified networks
for(chunkInd in 1:length(chunkNetworks)){
# make sure the re-estimation lists are empty in the beginning
modelResList = list()
modelSeList = list()
missModelList = list()
propMissList = list()
# set a specific index for the network
chosenNetInd = chunkNetworks[chunkInd]
# file list for the chosen network index
chosenNetOutFiles = grep(paste(chunkRdataNames[chunkInd], chosenNetInd, sep =""), outputList)
## regex the MNAR files
chosenMnarOutFiles = grep(paste(chunkMnarNames[chunkInd], chosenMissLabel, "_net", chosenNetInd, sep = ""), outputList)
# combine the two indices
combinedNetOutList = c(chosenNetOutFiles, chosenMnarOutFiles)
# check to make sure there aren't any duplicates
if( length(unique(combinedNetOutList)) != length(combinedNetOutList)){
stop("Duplicate re-estimation datafiles! BAD.")
}
# loop to load
chosenNetOutList = outputList[combinedNetOutList]
for(fileInd in 1:length(chosenNetOutList)){
# load all files
load(here("Output", "20230825_simMissReest", chosenMissLabel, chosenNetOutList[fileInd]))
# suppress these version difference warnings.
suppressWarnings({
modelResList[[fileInd]] = modelres$coefficients
modelSeList[[fileInd]] = summary(modelres)$coefficients[,2]
})
# slight difference for the chosenMnarOutFiles because missModelList[[fileInd]] = always 4 (let 2 be mar ergm, 4 be mnar ergm)
# this branch *should* be setting the miss model to 4 if they were regexed to be a part of the mnar list.
if(combinedNetOutList[fileInd] %in% chosenMnarOutFiles){
missModelList[[fileInd]] = 4
} else {
missModelList[[fileInd]] = as.integer(sub(".*?_missModel.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# propmiss should be the same
propMissList[[fileInd]] = as.integer(sub(".*?_prop.*?(\\d+).*", "\\1", chosenNetOutList[[fileInd]]))
}
# this can be a loop!
for( chosenPara in 1:length(modelParaOptions)){
# grab the chosen parameter name
chosenParaName = modelParaOptions[chosenPara]
# still need to take out true parameter values
tempTruePara = trueCoefList[[chosenNetInd]][chosenPara]
tempTrueSe = trueSeList[[chosenNetInd]][chosenPara]
# including edges
# test run of the plot function
testPlotData = prepMissPlot(modelResList = modelResList,
modelSeList = modelSeList,
missModelList = missModelList,
propMissList = propMissList,
chosenProp = chunkMissProp,
chosenPara = chosenParaName)
# brief inspection just to make sure nothing's wrong
head(testPlotData) %>%
print()
# and plot it
missCaterpillarPlot(plotData = testPlotData,
truePara = tempTruePara,
trueSe = tempTrueSe,
chosenPara = chosenParaName,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue) %>%
print()
}
# a temporary data frame from the output to be binded to the big data frame
tempFailData = prepFailPlot(propMissList = propMissList,
missModelList = missModelList,
chosenNetInd = chosenNetInd,
chosenProp = chunkMissProp,
chosenMiss = chosenMissValue)
# and bind it
failRateData = rbind(failRateData, tempFailData)
}
## parameter SE missModel
## 1 -3.962095 0.9023328 marErgm
## 2 -1.183762 1.5346827 Latent
## 3 -14.485997 NA mnarErgm
## parameter SE missModel
## 1 0.6043238 0.3158800 marErgm
## 2 -0.3741873 0.4741993 Latent
## 3 -1.6860124 NA mnarErgm
## parameter SE missModel
## 1 0.1417893 0.1547958 marErgm
## 2 0.5251103 0.1820051 Latent
## 3 9.7456273 NA mnarErgm
# plotting the failure rate
# data frame for the completely failed missingness combinations
# just for the formatting
tempFailData = failRateData[1,]
# todd 10s
tempFailData[1, ] = c(1, 3, 1, 1, "0")
tempFailData[2, ] = c(1, 3, 2, 1, "0")
tempFailData[3, ] = c(1, 3, 3, 1, "0")
tempFailData[4, ] = c(1, 3, 4, 1, "0")
# peter 10s
tempFailData[5, ] = c(1, 1, 1, 1, "NA")
tempFailData[6, ] = c(1, 1, 2, 1, "NA")
tempFailData[7, ] = c(1, 1, 3, 1, "NA")
tempFailData[8, ] = c(1, 3, 2, 1, "NA")
tempFailData[9, ] = c(1, 3, 3, 1, "NA")
tempFailData[10, ] = c(1, 3, 4, 1, "NA")
# todd 35s
tempFailData[11, ] = c(1, 3, 2, 3, "0")
# peter 35s
tempFailData[12, ] = c(1, 1, 1, 3, "NA")
tempFailData[13, ] = c(1, 1, 2, 3, "NA")
tempFailData[14, ] = c(1, 1, 3, 3, "NA")
tempFailData[15, ] = c(1, 3, 1, 3, "NA")
tempFailData[16, ] = c(1, 3, 4, 3, "NA")
# todd 60
tempFailData[17, ] = c(1, 3, 1, 6, "0")
# peter 60
tempFailData[18, ] = c(1, 1, 3, 6, "NA")
tempFailData[19, ] = c(1, 3, 1, 6, "NA")
tempFailData[20, ] = c(1, 3, 4, 6, "NA")
# object type stuff
tempFailData$failRate = as.numeric(tempFailData$failRate)
tempFailData$netInd = as.numeric(tempFailData$netInd)
tempFailData$missModel = as.numeric(tempFailData$missModel)
tempFailData$missProp = as.numeric(tempFailData$missProp)
# and bind
failRateData = rbind(failRateData, tempFailData)
# check for duplicates
if(any(duplicated(failRateData))){
# take only distinct elements
failRateData = distinct(failRateData)
}
# give the fail rate data (keep it untouched) a unique variable for the combination of missingness
failRatePlotData = failRateData %>%
group_by(missProp, missSave) %>%
mutate(missCondition = cur_group_id()) %>%
ungroup()
# turn variables into factors for plotting
failRatePlotData$missModel = factor(failRatePlotData$missModel, levels = c(1,2,3,4), labels = c("Indep", "marErgm", "Latent", "mnarErgm"))
failRatePlotData$missCondition = factor(failRatePlotData$missCondition, levels = c(1,2,3,4,5,6), labels = c("Todd10", "Peter10", "Todd35", "Peter35", "Todd60", "Peter60"))
# subsetting per network because between-network comparisons aren't too informative
# within network comparisons where I have a specific group index for every combination of missSave and missProp (6 categories total)
selectedNets = c(1,2,3,4,5,6)
for(chosenNetInd in selectedNets){
# start with a subset of net 2
failRateSubset = failRatePlotData %>%
filter(netInd == chosenNetInd)
# and now we have plot-ready data
print(ggplot(failRateSubset, aes(fill = missModel, y = failRate, x = missCondition)) +
geom_bar(position="dodge", stat="identity") +
xlab("Missingness condition") +
ylab("Failure rate") +
ggtitle(paste("Failure rate - Net", chosenNetInd)) +
theme_classic() +
theme(axis.ticks.x = element_blank()) +
ylim(0,1) )
}
Relative bias was defined as:
\[rBias = \frac{\widetilde{\theta} - \theta}{\theta},\]
with \(\widetilde{\theta}\) representing the re-estimated parameter value and \(\theta\) representing the true parameter value.
The relative standard error is defined as: \[rSe = \frac{SE(\widetilde{\theta})}{SE(\theta)}.\]
# change some variable types
relBiasData$netInd = as.factor(relBiasData$netInd)
relSeData$netInd = as.factor(relSeData$netInd)
missPropChoices = c(1, 3, 6)
paraChoices = c("edges", "altStar", "gwesp")
# loop it
for(chosenProp in missPropChoices){
for(chosenPara in paraChoices){
# I don't even know what kind of plot this is...
# but I need to do some wrangling and subsetting.
# need to subset some stuff
relBiasSub = relBiasData %>%
filter(propMiss == chosenProp & paraName == chosenPara)
# use the 2.5% and 97.5% quantiles for the axis limits because some of them have some VERY extreme values
tempBiasBounds = c(quantile(relBiasSub$rBias, probs = c(0.025, 0.975)))
# so the plot would be something like
relBiasPlot = ggplot(relBiasSub, aes(x = netInd, y = rBias, fill = missModel)) +
geom_boxplot(position = position_dodge()) +
facet_wrap(~missSave) +
theme_classic() +
geom_hline(yintercept = 0, col = "black", lty = 2) +
ylim((tempBiasBounds + c(-0.5, 0.5))) + # and add a bit of wiggle room
xlab("Network no.") +
ylab("Relative bias value") +
ggtitle(paste("Proportion", ifelse(chosenProp == 3, yes = 3.5, no = chosenProp), "- ", chosenPara)) # the ifelse is because of how I've regexed 35% missingness
# print plot
print(relBiasPlot)
# and another one for variance (technically relative standard error... but yeah)
relSeSub = relSeData %>%
filter(propMiss == chosenProp & paraName == chosenPara)
# just the 97.5% quantile because this is strictly positive
tempVarUpper = quantile(relSeSub$rSe, probs = c(0.975), na.rm = TRUE)
# so the plot would be something like
relSePlot = ggplot(relSeSub, aes(x = netInd, y = rSe, fill = missModel)) +
geom_boxplot(position = position_dodge()) +
facet_wrap(~missSave) +
theme_classic() +
geom_hline(yintercept = 1, col = "black", lty = 2) +
ylim(0, tempVarUpper + 0.5) + # some wiggle room
xlab("Network no.") +
ylab("Relative variance value") +
ggtitle(paste("Proportion", ifelse(chosenProp == 3, yes = 3.5, no = chosenProp), "- ", chosenPara)) # the ifelse is because of how I've regexed 35% missingness
# print plot
print(relSePlot)
}
}
## Warning: Removed 40 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 36 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 5 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 47 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 25 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 51 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 26 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 19 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 38 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 44 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 35 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 50 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 45 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 23 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 76 rows containing non-finite values (`stat_boxplot()`).
# a repeat without the imposed axes just for comparison
for(chosenProp in missPropChoices){
for(chosenPara in paraChoices){
# I don't even know what kind of plot this is...
# but I need to do some wrangling and subsetting.
# need to subset some stuff
relBiasSub = relBiasData %>%
filter(propMiss == chosenProp & paraName == chosenPara)
# so the plot would be something like
relBiasPlot = ggplot(relBiasSub, aes(x = netInd, y = rBias, fill = missModel)) +
geom_boxplot(position = position_dodge()) +
facet_wrap(~missSave) +
theme_classic() +
geom_hline(yintercept = 0, col = "black", lty = 2) +
xlab("Network no.") +
ylab("Relative bias value") +
ggtitle(paste("Proportion", ifelse(chosenProp == 3, yes = 3.5, no = chosenProp), "- ", chosenPara)) # the ifelse is because of how I've regexed 35% missingness
# print plot
print(relBiasPlot)
# and another one for variance (technically relative standard error... but yeah)
relSeSub = relSeData %>%
filter(propMiss == chosenProp & paraName == chosenPara)
# so the plot would be something like
relSePlot = ggplot(relSeSub, aes(x = netInd, y = rSe, fill = missModel)) +
geom_boxplot(position = position_dodge()) +
facet_wrap(~missSave) +
theme_classic() +
geom_hline(yintercept = 1, col = "black", lty = 2) +
xlab("Network no.") +
ylab("Relative variance value") +
ggtitle(paste("Proportion", ifelse(chosenProp == 3, yes = 3.5, no = chosenProp), "- ", chosenPara)) # the ifelse is because of how I've regexed 35% missingness
# print plot
print(relSePlot)
}
}
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 6 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 9 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 17 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 49 rows containing non-finite values (`stat_boxplot()`).